from pyexpat import features
import requests
from bs4 import BeautifulSoup
import re
import json
import pandas as pd

url = 'https://www.welan.com/c1231/'


def dealtext(url):
    graph_lst = []
    detail_href = []
    name_lst = []
    editor_lst = []
    price_lst = []
    press_lst = []
    ISBN_lst = []
    date_lst = []
    dic = {}
    useragent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64;rv:57.0) Gecko/20100101 Firefox/57.0'
    http_headers = {'User-Agent': useragent, 'Accept': 'text/html'}
    proxies = {'http': 'http://8.8.8.8:8888', 'https': 'http://4.4.4.4:8444'}
    worker_session = requests.Session()
    log_in_result = worker_session.post(url, headers=http_headers)
    _cookies = log_in_result.cookies
    content = worker_session.get(url, headers=http_headers, cookies=_cookies)
    soup = BeautifulSoup(content.text)
    graph = soup.select('div.sellers-img>a>img')  # 图片爬取
    graph_lst.extend(re.findall(r'http://[a-zA-Z0-9\/\.]*.jpg', str(graph)))
    # print(graph_lst)
    detail = soup.find_all(class_="sellers-detail")  # 书本链接爬取
    detail_href.extend(re.findall(r'https://[a-zA-Z0-9\/\.]*', str(detail)))
    # print(detail_href)
    '''for i in range(len(detail_href)):
        text=requests.get(detail_href[i],'lxml')
        soup1=BeautifulSoup(text.text)
        price=soup1.select('span.old-price')
        print(soup1)'''
    for price in soup.find_all('span', class_='red'):  # 价格爬取
        price_lst.append(price.text)
    print(price_lst)
    for name in soup.find_all(class_='sellers-detail'):  # 书名爬取
        name_lst.append(name.text)
    print(name_lst)
    lst = []
    for i in range(len(name_lst)):  # 合并
        dic[name_lst[i]] = [detail_href[i], graph_lst[i], price_lst[i]]
        lst.append([name_lst[i][0:6], detail_href[i], graph_lst[i], eval(price_lst[i][1:])])
    print(dic)
    print(lst)
    data = pd.DataFrame(lst)
    data.columns = ['书本内容介绍', '书本链接', '书本图片链接', '书本价格']
    print(data)
    data.to_csv('top.csv', index=False)
    fp = open('top.txt', 'w', encoding='utf-8')